
		**********************************************************
		** File: Figure F3.do									**
		** Paper: Human Trafficking Indicators: A New Dataset	**
		** Author: Richard Frank								**
		** Date: July 11, 2021									**
		** Task: Creating Figure F3								**
 		**********************************************************
	


	version 16.1
	clear
	set seed 1234

	cd "/Users/rich/Dropbox/Data/HT/HT TIP text/Parsehub/"

	use "TIP_2001_alltext.dta"
	append using "TIP_2002_alltext.dta"
	append using "TIP_2003_alltext.dta"
	append using  "TIP_2004_alltext.dta"	
	append using  "TIP_2005_alltext.dta"
	append using  "TIP_2006_alltext.dta"
	append using  "TIP_2007_alltext.dta"
	append using  "TIP_2008_alltext.dta"	
	append using  "TIP_2009_alltext.dta"	
	append using  "TIP_2010_alltext.dta"
	append using  "TIP_2011_alltext.dta"
	append using  "TIP_2012_alltext.dta"	
	append using  "TIP_2013_alltext.dta"	
	append using  "TIP_2014_alltext.dta"
	append using  "TIP_2015_alltext.dta"
	append using  "TIP_2016_alltext.dta"	
	append using  "TIP_2017_alltext.dta"	
 	
	
	replace trafficking_profile=intro if intro~="" & traffic==""
	keep country year trafficking_profile alltext
	
		 
	rename country  Country

	run "cow.do"
	label var year "Year"
	tab Country if ccode==0
	tab Country
	rename Country country
	
	drop if ccode==0
	
	gen profile_words=wordcount(trafficking_profile)
	gen all_words=wordcount(alltext)
 
	
	sort profile
	
	* fixing missing values *
	* serbia & and montenegro are both in same report
	replace prof=106 if ccode==345 & year==2005
	* montenegro
	replace prof=106 if ccode==341 & year==2005	
	

	** outliers are Somalia SC in 20XX and Denmark in 2008
	
	browse if year==2013
	replace prof=57 if ccode==390 & year==2008
	
	graph box profile_words, over(year)
	
	** fixing somalia
		replace prof= 1150 if ccode==520 & year==2013
 
	
		graph box profile_words, over(year)
		graph save profilewords.gph, replace
		graph box all_words, over(year)		
		graph save allwords.gph, replace		
		graph combine profilewords.gph allwords.gph, cols(1)
	
 
 
